In [13]:
import numpy as np

def compute_depth(order_book_side):
    """Compute total depth (total quantity available) on one side of the book."""
    return sum(q for _, q in order_book_side)

def compute_slope(order_book_side):
    """Compute the slope of the bid or ask curve."""
    if len(order_book_side) < 2:
        return None  # Not enough data to compute slope
    p1, q1 = order_book_side[0]
    p2, _ = order_book_side[1]
    return (p1 - p2) / q1 if q1 > 0 else None

def compute_quantity_weighted_price(order_book_side):
    """Compute the quantity-weighted price for bid or ask."""
    total_quantity = sum(q for _, q in order_book_side)
    if total_quantity == 0:
        return None
    weighted_price = sum(p * q for p, q in order_book_side) / total_quantity
    return weighted_price

def compute_quantity_weighted_mid_quote(bid_side, ask_side):
    """Compute the quantity-weighted mid-quote."""
    wp_bid = compute_quantity_weighted_price(bid_side)
    wp_ask = compute_quantity_weighted_price(ask_side)
    if wp_bid is None or wp_ask is None:
        return None
    return (wp_bid + wp_ask) / 2

def compute_quantity_weighted_bid_ask_spread(bid_side, ask_side):
    """Compute the quantity-weighted bid-ask spread."""
    wp_bid = compute_quantity_weighted_price(bid_side)
    wp_ask = compute_quantity_weighted_price(ask_side)
    if wp_bid is None or wp_ask is None:
        return None
    return wp_ask - wp_bid

def compute_mid_quote_difference(mid_quote, bid_side, ask_side):
    """Compute the difference between mid-quote and quantity-weighted mid-quote."""
    wmid = compute_quantity_weighted_mid_quote(bid_side, ask_side)
    if wmid is None:
        return None
    return mid_quote - wmid

def get_order_book(timestamp, df):
    """Extracts bid and ask order book lists sorted by price"""
    df_time = df[df['Timestamp'] == timestamp]
    
    # Extract and sort bid side (highest price first)
    bid_side = df_time[df_time['Side'] == 'bid'][['Price', 'Size']].sort_values(by='Price', ascending=False)
    bid_side = list(bid_side.itertuples(index=False, name=None))  # Convert to list of tuples
    
    # Extract and sort ask side (lowest price first)
    ask_side = df_time[df_time['Side'] == 'ask'][['Price', 'Size']].sort_values(by='Price', ascending=True)
    ask_side = list(ask_side.itertuples(index=False, name=None))  # Convert to list of tuples
    
    return bid_side, ask_side

import pandas as pd

def compute_orderbook_changes(orderbook_df):
    """
    Computes the changes in the order book at each timestamp sequentially.
    
    Parameters:
    - orderbook_df: DataFrame with ['Price', 'Size', 'Side', 'Timestamp'].

    Returns:
    - DataFrame showing changes in order book per timestamp, including previous size.
    """
    # Convert timestamp to datetime if not already
    orderbook_df['Timestamp'] = pd.to_datetime(orderbook_df['Timestamp'])

    # Sort data by timestamp and price for consistent comparison
    orderbook_df = orderbook_df.sort_values(by=["Timestamp", "Price"]).reset_index(drop=True).drop_duplicates()

    # List to store changes
    changes = []

    # Unique timestamps sorted
    timestamps = orderbook_df['Timestamp'].unique()

    for i in range(len(timestamps) - 1):
        t1, t2 = timestamps[i], timestamps[i + 1]

        # Order books at two consecutive timestamps
        ob_t1 = orderbook_df[orderbook_df['Timestamp'] == t1].set_index(['Price', 'Side'])['Size']
        ob_t2 = orderbook_df[orderbook_df['Timestamp'] == t2].set_index(['Price', 'Side'])['Size']

        # Compute changes
        size_changes = ob_t2.subtract(ob_t1, fill_value=0)

        # Store only nonzero changes along with previous size
        for (price, side), change in size_changes.items():
            if change != 0:
                prev_size = ob_t1.get((price, side), 0)  # Get previous size, default to 0 if not present
                new_size = ob_t2.get((price, side), 0)  # Get new size
                changes.append([t2, price, side, prev_size, new_size, change])

    # Convert to DataFrame
    changes_df = pd.DataFrame(changes, columns=['Timestamp', 'Price', 'Side', 'Prev_Size', 'New_Size', 'Size_Change'])

    return changes_df

Limit Order Book Metrics¶

1. Total Depth¶

The total depth on one side of the order book is the sum of all available quantities:

$ \text{Depth} = \sum_{i} q_i $

where $q_i$ is the quantity available at each price level $i$.

2. Slope of the Order Book¶

The slope of the bid or ask curve measures how quickly the price changes with respect to quantity:

$ \text{Slope}_{\text{bid}} = \frac{p_{\text{bid},1} - p_{\text{bid},2}}{q_{\text{bid},1}} $

where:

  • $p_{\text{bid},1}$ is the best (highest) bid price,
  • $p_{\text{bid},2}$ is the second-best bid price,
  • $q_{\text{bid},1}$ is the quantity available at $p_{\text{bid},1}$.

The ask slope is defined analogously.

3. Quantity-Weighted Price¶

The quantity-weighted price for bids or asks is:

$ \text{WP}_{\text{side}} = \frac{\sum_{i} p_{\text{side},i} \cdot q_{\text{side},i}}{\sum_{i} q_{\text{side},i}} $

where:

  • $p_{\text{side},i}$ is the price at level $i$ on the given side (bid or ask),
  • $q_{\text{side},i}$ is the corresponding quantity.

4. Quantity-Weighted Mid-Quote¶

The quantity-weighted mid-quote is the average of the quantity-weighted bid and ask prices:

$ \text{WMid} = \frac{\text{WP}_{\text{bid}} + \text{WP}_{\text{ask}}}{2} $

where:

  • $\text{WP}_{\text{bid}}$ is the quantity-weighted bid price,
  • $\text{WP}_{\text{ask}}$ is the quantity-weighted ask price.

5. Quantity-Weighted Bid-Ask Spread¶

The quantity-weighted bid-ask spread is given by:

$ \text{WSpread} = \text{WP}_{\text{ask}} - \text{WP}_{\text{bid}} $

which represents the difference between the quantity-weighted ask and bid prices.

6. Mid-Quote Difference¶

The difference between the regular mid-quote and the quantity-weighted mid-quote is:

$ \text{Mid-Quote Difference} = \text{MidQuote} - \text{WMid} $

where $\text{MidQuote}$ is the traditional mid-point between the best bid and ask prices.

In [21]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import json
import warnings
warnings.simplefilter("ignore")

# Open and load the JSON file
with open("matchup_details.json", "r") as file:
    matchups = json.load(file)  # Parse JSON into a Python dictionary

matchups_list = [(i.split(' ')[0], i.split(' ')[2]) for i in list(matchups.keys())]

# Dictionary to store where each team appears in the filtered list
team_indices = {}
filtered_matchups = []

for i, (team1, team2) in enumerate(matchups_list):
    if team1 in team_indices or team2 in team_indices:
        old_index = team_indices.get(team1, team_indices.get(team2))
        if old_index is not None:
            filtered_matchups.pop(old_index)
            for key in team_indices:
                if team_indices[key] > old_index:
                    team_indices[key] -= 1
    team_indices[team1] = len(filtered_matchups)
    team_indices[team2] = len(filtered_matchups)
    filtered_matchups.append((team1, team2))

id_filtered = [matchups[" vs. ".join(i)]['outcomes'] for i in filtered_matchups if " vs. ".join(i) in matchups]
keys = list(id_filtered[2].keys())
# comparison = df.sort_values(['Timestamp', 'Price']).drop_duplicates()
keys = list(id_filtered[3].keys())

gamelist = [list(i.keys()) for i in id_filtered][2:]
for keys in gamelist:
    team_data_changes = {}
    for team in keys:
        df = pd.read_parquet(team + '.parquet')
        bid_df = df[df.Side == 'bid']
        ask_df = df[df.Side == 'ask']
        bid_changes = compute_orderbook_changes(bid_df)
        ask_changes = compute_orderbook_changes(ask_df)
        team_data_changes[team] = pd.concat([bid_changes, ask_changes])

    fig, axes = plt.subplots(1, 2, figsize=(30, 8), sharex=True, sharey=True, facecolor=(1,1,1))

    for i, team in enumerate(keys[:2]):  # Only plot top two
        col = i
        bid_data_changes = team_data_changes[team][team_data_changes[team].Side == 'bid']
        ask_data_changes = team_data_changes[team][team_data_changes[team].Side == 'ask']
        scatter_bid = axes[col].scatter(
            bid_data_changes["Timestamp"], bid_data_changes["Price"], 
            c=bid_data_changes["Size_Change"], cmap="coolwarm", s=200, 
            edgecolors="k", marker='s', label="Bid Changes"
        )
        scatter_ask = axes[col].scatter(
            ask_data_changes["Timestamp"], ask_data_changes["Price"], 
            c=ask_data_changes["Size_Change"], cmap="coolwarm", s=200, 
            edgecolors="k", marker='o', label="Ask Changes"
        )

        timestamps = sorted(team_data_changes[team]['Timestamp'].unique())
        bid_depths, ask_depths, mid_quotes, bid_ask_spreads = [], [], [], []

        for timestamp in timestamps:
            bid_side, ask_side = get_order_book(timestamp, pd.read_parquet(f'{team}.parquet').drop_duplicates())
            bid_depths.append(compute_depth(bid_side))
            ask_depths.append(compute_depth(ask_side))
            mid_quotes.append(compute_quantity_weighted_mid_quote(bid_side, ask_side))
            bid_ask_spreads.append(compute_quantity_weighted_bid_ask_spread(bid_side, ask_side))

        timestamps_num = [t.timestamp() for t in timestamps]

        axes[col].plot(timestamps, mid_quotes, label="Quantity-Weighted Mid-Quote", marker='o', color='black', markersize=10)
        axes[col].plot(timestamps, bid_ask_spreads, label="Quantity-Weighted Bid-Ask Spread", marker='s', color='red', markersize=12)
        
        axes[col].set_xlabel("Timestamp", fontsize=25)
        axes[col].set_ylabel("Price Level", fontsize=25)
        axes[col].set_title(f"{team} - Bid & Ask Changes", fontsize=35)
        axes[col].tick_params(axis="x", rotation=45)
        axes[col].grid(True)
    
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='lower center', ncol=10, fontsize=20)
    plt.suptitle(f'Order Book Visualization for {team}', fontsize=40)
    plt.tight_layout(rect=[0, 0.1, 1, 1])
    plt.show()